<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml">
  <head>

    <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
    <meta content="Cask Data, Inc." name="author" />
<meta content="Copyright © 2017 Cask Data, Inc." name="copyright" />
<meta content="The CDAP User Guide: Getting Started" name="description" />


    <meta name="git_release" content="6.1.1">
    <meta name="git_hash" content="05fbac36f9f7aadeb44f5728cea35136dbc243e5">
    <meta name="git_timestamp" content="2020-02-09 08:22:47 +0800">
    <title>Example: Building a Stock Selection Pipeline</title>

    <link rel="stylesheet" href="../_static/cdap-bootstrap.css" type="text/css" />
    <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
    <link rel="stylesheet" href="../_static/bootstrap-3.3.6/css/bootstrap.min.css" type="text/css" />
    <link rel="stylesheet" href="../_static/bootstrap-3.3.6/css/bootstrap-theme.min.css" type="text/css" />
    <link rel="stylesheet" href="../_static/css/bootstrap-sphinx.css" type="text/css" />
    <link rel="stylesheet" href="../_static/css/cdap-dynamicscrollspy-4.css" type="text/css" />
    <link rel="stylesheet" href="../_static/css/jquery.mCustomScrollbar.css" type="text/css" />
    <link rel="stylesheet" href="../_static/css/cdap-jquery.mCustomScrollbar.css" type="text/css" />
    <link rel="stylesheet" href="../_static/css/abixTreeList-2.css" type="text/css" />
    <link rel="stylesheet" href="../_static/cdap-bootstrap.css" type="text/css" />
    <link rel="stylesheet" href="../_static/css/cdap-hide-toc.css" type="text/css" />

    <script type="text/javascript">
      var DOCUMENTATION_OPTIONS = {
        URL_ROOT:    '',
        VERSION:     '6.1.1',
        COLLAPSE_INDEX: false,
        FILE_SUFFIX: '.html',
        HAS_SOURCE:  false
      };
    </script>
    <script type="text/javascript" src="../_static/jquery.js"></script>
    <script type="text/javascript" src="../_static/underscore.js"></script>
    <script type="text/javascript" src="../_static/doctools.js"></script>
    <script type="text/javascript" src="../_static/language_data.js"></script>

    <link rel="shortcut icon" href="../_static/favicon.ico"/>
    <link rel="index" title="Index" href="../genindex.html" />
    <link rel="search" title="Search" href="../search.html" />
    <link rel="top" title="Cask Data Application Platform 6.1.1 Documentation" href="../index.html" />
    <link rel="up" title="CDAP 入门指南" href="index.html" />
    <link rel="next" title="示例: 分析和物联网设备数据脱敏" href="fitbit.html" />
    <link rel="prev" title="示例: 使用纽约时报 XML 数据推送" href="nytimes-xml.html" />
    <!-- block extrahead -->
    <meta charset='utf-8'>
    <meta http-equiv='X-UA-Compatible' content='IE=edge,chrome=1'>
    <meta name='viewport' content='width=device-width, initial-scale=1.0, maximum-scale=1'>
    <meta name="apple-mobile-web-app-capable" content="yes">
    <!-- block extrahead end -->

</head>
<body role="document">

<!-- block navbar -->
<div id="navbar" class="navbar navbar-inverse navbar-default navbar-fixed-top">
    <div class="container-fluid">
      <div class="row">
        <div class="navbar-header">
          <!-- .btn-navbar is used as the toggle for collapsed navbar content -->
          <a class="navbar-brand" href="../table-of-contents/../../index.html">
            <span><img alt="CDAP logo" src="../_static/cdap_logo.svg"/></span>
          </a>

          <button type="button" class="navbar-toggle" data-toggle="collapse" data-target=".nav-collapse">
            <span class="icon-bar"></span>
            <span class="icon-bar"></span>
            <span class="icon-bar"></span>
          </button>

          <div class="pull-right">
            <div class="dropdown version-dropdown">
              <a href="#" class="dropdown-toggle" data-toggle="dropdown"
                role="button" aria-haspopup="true" aria-expanded="false">
                v 6.1.1 <span class="caret"></span>
              </a>
              <ul class="dropdown-menu">
                <li><a href="//docs.cdap.io/cdap/5.1.2/en/index.html">v 5.1.2</a></li>
                <li><a href="//docs.cdap.io/cdap/4.3.4/en/index.html">v 4.3.4</a></li>
              </ul>
            </div>
          </div>
          <form class="navbar-form navbar-right navbar-search" action="../search.html" method="get">
            <div class="form-group">
              <div class="navbar-search-image material-icons"></div>
              <input type="text" name="q" class="form-control" placeholder="  Search" />
            </div>
            <input type="hidden" name="check_keywords" value="yes" />
            <input type="hidden" name="area" value="default" />
          </form>

          <div class="collapse navbar-collapse nav-collapse navbar-right navbar-navigation">
            <ul class="nav navbar-nav"><li class="docsite-nav-tab-container"><a class="docsite-nav-tab-link " href="../table-of-contents/../../index.html">简介</a></li><li class="docsite-nav-tab-container"><a class="docsite-nav-tab-link current" href="../table-of-contents/../../guides.html">手册</a></li><li class="docsite-nav-tab-container"><a class="docsite-nav-tab-link " href="../table-of-contents/../../reference-manual/index.html">参考</a></li><li class="docsite-nav-tab-container"><a class="docsite-nav-tab-link " href="../table-of-contents/../../faqs/index.html">帮助</a></li>
            </ul>
          </div>

        </div>
      </div>
    </div>
  </div><!-- block navbar end -->
<!-- block main content -->
<div class="main-container container">
  <div class="row"><div class="col-md-2">
      <div id="sidebar" class="bs-sidenav scrollable-y-outside" role="complementary">
<!-- theme_manual: user-guide -->
<!-- theme_manual_highlight: guides -->
<!-- sidebar_title_link: ../table-of-contents/../../guides.html -->

  <div role="note" aria-label="manuals links"><h3><a href="../table-of-contents/../../guides.html">Guides</a></h3>

    <ul class="this-page-menu">
      <li class="toctree-l1"><b><a href="../table-of-contents/../../user-guide/index.html" rel="nofollow">用户手册</a></b>
      <nav class="pagenav">
      <ul class="current">
<li class="toctree-l1"><a class="reference internal" href="../index.html"> 简介</a></li>
<li class="toctree-l1"><a class="reference internal" href="../overview.html"> 概述</a></li>
<li class="toctree-l1 current"><a class="reference internal" href="index.html"> 入门指南</a><ul class="current">
<li class="toctree-l2"><a class="reference internal" href="campaign.html">MySQL 客户数据</a></li>
<li class="toctree-l2"><a class="reference internal" href="nytimes-xml.html">纽约时报 XML 数据推送</a></li>
<li class="toctree-l2 current"><a class="current reference internal" href="#">股票选择</a></li>
<li class="toctree-l2"><a class="reference internal" href="fitbit.html">物联网 IoT 设备数据</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../data-preparation/index.html"> 数据预处理</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../data-preparation/concepts.html">      概念</a></li>
<li class="toctree-l2"><a class="reference internal" href="../data-preparation/directives/index.html">      数据处理指令</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/catalog-lookup.html">catalog-lookup</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/change-column-case.html">change-column-case</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/changing-case.html">changing-case</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/cleanse-column-names.html">cleanse-column-names</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/columns-replace.html">columns-replace</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/copy.html">copy</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/cut-character.html">cut-character</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/decode.html">decode</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/diff-date.html">diff-date</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/drop.html">drop</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/encode.html">encode</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/extract-regex-groups.html">extract-regex-groups</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/fail.html">fail</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/fill-null-or-empty.html">fill-null-or-empty</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/filter-row-if-matched.html">filter-row-if-matched</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/filter-row-if-true.html">filter-row-if-true</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/filter-rows-on.html">filter-rows-on</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/find-and-replace.html">find-and-replace</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/flatten.html">flatten</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/format-date.html">format-date</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/format-unix-timestamp.html">format-unix-timestamp</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/generate-uuid.html">generate-uuid</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/hash.html">hash</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/increment-variable.html">increment-variable</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/index-split.html">index-split</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/invoke-http.html">invoke-http</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/json-path.html">json-path</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/keep.html">keep</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/mask-number.html">mask-number</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/mask-shuffle.html">mask-shuffle</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/merge.html">merge</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/parse-as-avro-file.html">parse-as-avro-file</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/parse-as-avro.html">parse-as-avro</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/parse-as-csv.html">parse-as-csv</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/parse-as-date.html">parse-as-date</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/parse-as-excel.html">parse-as-excel</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/parse-as-fixed-length.html">parse-as-fixed-length</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/parse-as-hl7.html">parse-as-hl7</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/parse-as-json.html">parse-as-json</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/parse-as-log.html">parse-as-log</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/parse-as-simple-date.html">parse-as-simple-date</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/parse-as-xml.html">parse-as-xml</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/parse-timestamp.html">parse-timestamp</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/parse-xml-to-json.html">parse-xml-to-json</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/quantize.html">quantize</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/rename.html">rename</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/send-to-error.html">send-to-error</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/set-charset.html">set-charset</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/set-column.html">set-column</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/set-columns.html">set-columns</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/set-record-delim.html">set-record-delim</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/set-type.html">set-type</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/set-variable.html">set-variable</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/split-by-separator.html">split-by-separator</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/split-email.html">split-email</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/split-to-columns.html">split-to-columns</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/split-to-rows.html">split-to-rows</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/split-url.html">split-url</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/stemming.html">stemming</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/swap.html">swap</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/table-lookup.html">table-lookup</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/text-distance.html">text-distance</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/text-metric.html">text-metric</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/trim.html">trim</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/url-decode.html">url-decode</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/url-encode.html">url-encode</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/write-as-csv.html">write-as-csv</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/write-as-json-map.html">write-as-json-map</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/write-as-json-object.html">write-as-json-object</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/xpath.html">xpath</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../data-preparation/functions/index.html">      函数</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/functions/json-functions.html">JSON 函数</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/functions/type-functions.html">类型函数</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/functions/geofence-functions.html">地理围栏函数</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/functions/dq-functions.html">数据质量函数</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/functions/date-functions.html">日期函数</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/functions/ddl-functions.html">DDL 函数</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../data-preparation/service/index.html">      服务</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/service/admin.html">行政和管理服务</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/service/connection-properties.html">连接属性</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/service/connections.html">连接服务</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/service/execution.html">数据处理指令执行</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/service/request.html">请求格式规范</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/service/schema-registry.html">Schema 注册库</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/service/services.html">数据预处理服务</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../data-preparation/performance.html">性能</a></li>
<li class="toctree-l2"><a class="reference internal" href="../data-preparation/exclusion-and-aliasing.html">排除与别名</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../pipelines/index.html"> 数据流管道</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../pipelines/concepts-design.html"> 概念与设计</a></li>
<li class="toctree-l2"><a class="reference internal" href="../pipelines/getting-started.html"> 入门指南</a></li>
<li class="toctree-l2"><a class="reference internal" href="../pipelines/studio.html"> CDAP 数据流设计器</a></li>
<li class="toctree-l2"><a class="reference internal" href="../pipelines/creating-pipelines.html"> 创建数据流管道</a></li>
<li class="toctree-l2"><a class="reference internal" href="../pipelines/running-pipelines.html"> 运行数据流管道</a></li>
<li class="toctree-l2"><a class="reference internal" href="../pipelines/plugin-management.html"> 插件管理</a></li>
<li class="toctree-l2"><a class="reference internal" href="../pipelines/plugins/index.html"> 插件参考</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../pipelines/plugins/actions/index.html"> Action Plugins</a><ul class="simple">
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../pipelines/plugins/sources/index.html"> Source Plugins</a><ul class="simple">
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../pipelines/plugins/transforms/index.html"> Transform Plugins</a><ul class="simple">
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../pipelines/plugins/analytics/index.html"> Analytic Plugins</a><ul class="simple">
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../pipelines/plugins/sinks/index.html"> Sink Plugins</a><ul class="simple">
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../pipelines/plugins/shared-plugins/index.html"> Shared Plugins</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../pipelines/plugins/shared-plugins/core.html">CoreValidator</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../pipelines/plugins/post-run-plugins/index.html"> Post-run Plugins</a><ul class="simple">
</ul>
</li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../mmds/index.html"> 数据分析</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../mmds/concepts.html"> Concepts</a></li>
<li class="toctree-l2"><a class="reference internal" href="../mmds/feature-gen.html"> Feature Generation</a></li>
<li class="toctree-l2"><a class="reference internal" href="../mmds/modeling.html"> Modeling</a></li>
<li class="toctree-l2"><a class="reference internal" href="../mmds/example.html"> Example</a></li>
</ul>
</li>
</ul>
</nav>
      </li>
      <li class="toctree-l1"><a href="../table-of-contents/../../developer-manual/index.html" rel="nofollow">开发手册</a>
      </li>
      <li class="toctree-l1"><a href="../table-of-contents/../../admin-manual/index.html" rel="nofollow">管理手册</a>
      </li>
      <li class="toctree-l1"><a href="../table-of-contents/../../integrations/index.html" rel="nofollow">集成手册</a>
      </li>
      <li class="toctree-l1"><a href="../table-of-contents/../../examples-manual/index.html" rel="nofollow">最佳实践</a>
      </li>
    </ul>
  </div></div>
    </div><div class="col-md-8 content" id="main-content">
    
  <div class="section" id="example-building-a-stock-selection-pipeline">
<span id="tutorials-stocks"></span><h1>Example: Building a Stock Selection Pipeline<a class="headerlink" href="#example-building-a-stock-selection-pipeline" title="Permalink to this headline">🔗</a></h1>
<div class="section" id="introduction">
<h2>Introduction<a class="headerlink" href="#introduction" title="Permalink to this headline">🔗</a></h2>
<p>This tutorial demonstrates how to use CDAP’s 数据预处理 and 数据流管道 to build a stock selection pipeline that ingests market data and identifies a set of stocks that should be purchased.</p>
</div>
<div class="section" id="scenario">
<h2>Scenario<a class="headerlink" href="#scenario" title="Permalink to this headline">🔗</a></h2>
<p>You work in the financial industry and trade in U.S. equities markets. To select top stocks for purchase, you need to ingest large amounts of market data with many different variables, process this data, and return your stock picks.</p>
<ul class="simple">
<li>You want to write a stock selection pipeline that uses the criteria given by <a class="reference external" href="https://en.wikipedia.org/wiki/Magic_formula_investing">Greenblatt’s Magic Formula</a>.</li>
<li>You want to write your selections to a database that will be read by a program which submits your selections to the NYSE</li>
</ul>
</div>
<div class="section" id="data">
<h2>Data<a class="headerlink" href="#data" title="Permalink to this headline">🔗</a></h2>
<p>Click below to download a <cite>.csv</cite> file containing the data necessary to complete the tutorial.</p>
<p><a class="reference download internal" download="" href="../_downloads/38e3233c496d538d6cfbfce21b352bf4/stock_data.csv"><code class="xref download docutils literal notranslate"><span class="pre">stock_data.csv</span></code></a></p>
</div>
<div class="section" id="video-tutorial">
<h2>Video Tutorial<a class="headerlink" href="#video-tutorial" title="Permalink to this headline">🔗</a></h2>
</div>
<div class="section" id="step-by-step-walkthrough">
<h2>Step-by-Step Walkthrough<a class="headerlink" href="#step-by-step-walkthrough" title="Permalink to this headline">🔗</a></h2>
<div class="section" id="loading-the-data">
<h3>Loading the Data<a class="headerlink" href="#loading-the-data" title="Permalink to this headline">🔗</a></h3>
<p>First, download the file from the <cite>Data</cite> section above.</p>
<p>To begin, navigate to the 数据预处理 tab from the CDAP homepage. In 数据预处理, choose the arrow on the left hand side.</p>
<p>Upload the <cite>stock_data.csv</cite> from the <cite>File System.</cite></p>
</div>
<div class="section" id="background">
<h3>Background<a class="headerlink" href="#background" title="Permalink to this headline">🔗</a></h3>
<p>You are building a stock selection pipeline. The pipeline takes in all stocks on the NYSE and at each stage in the pipeline and reduces the number of stocks that are under consideration for purchase.</p>
<p>You are implementing a popular strategy in this tutorial known as <a class="reference external" href="https://en.wikipedia.org/wiki/Magic_formula_investing">Greenblatt’s Magic Formula</a>. You will adjust our strategy
slightly:</p>
<ol class="arabic simple">
<li>Establish a minimum market capitalization greater than $50 million.</li>
<li>Exclude utility and financial stocks.</li>
<li>Determine company’s return on capital = EBIT / (net fixed assets + working capital).</li>
<li>Rank all companies above chosen market capitalization by highest return on capital.</li>
<li>Invest in 20 highest ranked companies.</li>
</ol>
</div>
<div class="section" id="establishing-the-minimum-market-capitalization">
<h3>Establishing the Minimum Market Capitalization<a class="headerlink" href="#establishing-the-minimum-market-capitalization" title="Permalink to this headline">🔗</a></h3>
<p>First, you want to establish a minimum market capitalization.</p>
<p>Start by selecting the <cite>stock_data.csv</cite> tab. Choose the drop-down menu for the <cite>body</cite> column and apply Parse &gt; CSV with the ‘Set First Row as Header’ option selected. From the <cite>body</cite> column drop-down menu, choose <cite>Delete Column</cite> to delete the <cite>body</cite> column.</p>
<p>To calculate the market capitalization of the company, you want to find the share price multiplied by the number of outstanding shares. You will express this as:</p>
<p><cite>estimated_shares_outstanding * (high + low) / 2</cite></p>
<p>where high and low are the stocks’ daily high and lows.</p>
<p>You can use a custom <a class="reference external" href="http://commons.apache.org/proper/commons-jexl/reference/examples.html">JEXL</a> expression to calculate the <cite>market_capitalization</cite>.</p>
<p>First, you need to set the type of all the involved variables to the correct data type. Currently, <cite>high</cite>, <cite>low</cite>, and <cite>estimated_shares_outstanding</cite> are all strings.</p>
<p>At the bottom of the screen, type the following directive:</p>
<div class="figure align-center" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/set-type.jpeg"><img alt="../_images/set-type.jpeg" class="bordered-image" src="../_images/set-type.jpeg" style="width: 500px;" /></a>
</div>
<p>This will change the data type for <cite>high</cite> to a double. Repeat this for <cite>low</cite> and <cite>estimated_shares_outstanding</cite>. All three of these columns should now be doubles.</p>
<p>Now, choose the <cite>estimated_shares_outstanding</cite> drop-down column and select the Custom Transform option.</p>
<div class="figure align-center" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/market_cap.jpeg"><img alt="../_images/market_cap.jpeg" class="bordered-image" src="../_images/market_cap.jpeg" style="width: 500px;" /></a>
</div>
<p>The result of the calculation was stored in <cite>estimated_shares_outstanding</cite> (since this was the column you selected for the custom expression), so rename it <cite>market_capitalization</cite> by clicking once on the column name and typing the new text.</p>
<p>Finally, you want to filter out all companies with a market capitalization under $50m dollars. Scanning the data on the screen, you will see that no companies displayed in 数据预处理 have a market cap under $50m. However, 数据预处理 only samples 100 rows from the dataset, so there may be companies that do have a market cap under $50m elsewhere in the data.</p>
<p>To filter out these small cap companies, select the <cite>market_capitalization</cite> column choose the Filter option. Apply the custom condition “&gt;50000000”, as shown below.</p>
<div class="figure align-center" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/under50.jpeg"><img alt="../_images/under50.jpeg" class="bordered-image" src="../_images/under50.jpeg" style="width: 500px;" /></a>
</div>
</div>
<div class="section" id="excluding-financial-and-utility-stocks">
<h3>Excluding Financial and Utility Stocks<a class="headerlink" href="#excluding-financial-and-utility-stocks" title="Permalink to this headline">🔗</a></h3>
<p>Your next step is to exclude financial and utility stocks. This is similar step to establishing the minimum market capitalization.</p>
<p>Select the drop down for the <cite>gics_sector</cite> column and choose Filter. Choose “Remove Rows” if the column contains “Financials” (as shown below). Repeat this setup for “Utilities.”</p>
<div class="figure align-center" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/financials.jpeg"><img alt="../_images/financials.jpeg" class="bordered-image" src="../_images/financials.jpeg" style="width: 500px;" /></a>
</div>
</div>
<div class="section" id="calculate-return-on-capital-roc">
<h3>Calculate Return on Capital (ROC)<a class="headerlink" href="#calculate-return-on-capital-roc" title="Permalink to this headline">🔗</a></h3>
<p>Our final setp is to Calculate Return on Capital (ROC). ROC is defined as:</p>
<p><cite>EBIT / (net fixed assets + working capital)</cite></p>
<p>or equivalently (using our columns):</p>
<p>` earnings_before_interest_and_tax / (fixed_assets + (total_equity - total_liabilities))`</p>
<p>Before you can calculate the ROC for each company, you need to convert the type from String to Double for the columns <cite>earnings_before_interest_and_tax</cite>, <cite>fixed_assets</cite>, <cite>current_assets</cite>, and <cite>current_liabilities</cite>. This can be achieved by using the <cite>set-type</cite> directive. For example, you should apply <cite>set-type fixed_assets double</cite> (in the directive prompt at the bottom of the screen).</p>
<p>Once you have converted these columns, select the <cite>total_equity</cite> column drop-down menu and choose “Custom Transformation.” Apply the transformation <cite>earnings_before_interest_and_tax / (fixed_assets + (total_equity - total_liabilities))</cite>.</p>
<div class="figure align-center" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/roc.jpeg"><img alt="../_images/roc.jpeg" class="bordered-image" src="../_images/roc.jpeg" style="width: 500px;" /></a>
</div>
<p>Since the result has been stored in the <cite>total_equity</cite> column, rename this column to <cite>roc</cite>.</p>
<p>Finally, you would like to express the ROC as a percentage, rather than a decimal.</p>
<p>Select the drop-down menu for the <cite>roc</cite> column. Choose Calculate &gt; Multiply, and multiply by 100. You will see that the <cite>roc</cite> column now contains the return on capital as a percentage.</p>
<div class="figure align-center" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/percentage.jpeg"><img alt="../_images/percentage.jpeg" class="bordered-image" src="../_images/percentage.jpeg" style="width: 500px;" /></a>
</div>
</div>
<div class="section" id="creating-the-pipeline-for-picking-the-top-20-stocks">
<h3>Creating the Pipeline for Picking the Top 20 Stocks<a class="headerlink" href="#creating-the-pipeline-for-picking-the-top-20-stocks" title="Permalink to this headline">🔗</a></h3>
<p>Because 数据预处理 only displays and operates on 100 records, you need a way to operationalize our logic for the whole dataset. Click “Create Pipeline” and choose Batch.</p>
<p>In this section, you will create a pipeline that will ingest all the stock data, filter by the criteria above, choose the top 20 stocks by ROC, and write to a dataset.</p>
<p>When you initially create the pipeline, you will see the view below.</p>
<div class="figure align-center" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/percentage.jpeg"><img alt="../_images/percentage.jpeg" class="bordered-image" src="../_images/percentage.jpeg" style="width: 500px;" /></a>
</div>
<p>You need a way to feed the output from the Wrangler (数据预处理) node into a node which will select the top 20 stocks.</p>
<p>You can turn to the Hub for the Top-N plugin. CDAP comes bundled with many useful plugins. However, the Hub - which is open app store for Big Data Applications - contains many more.</p>
<p>Click <cite>Hub</cite> in the upper right hand corner to open the hub.</p>
<div class="figure align-center" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/percentage.jpeg"><img alt="../_images/percentage.jpeg" class="bordered-image" src="../_images/percentage.jpeg" style="width: 500px;" /></a>
</div>
<p>In the “Plugins” section, choose “Top-N.”</p>
<div class="figure align-center" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/topn.jpeg"><img alt="../_images/topn.jpeg" class="bordered-image" src="../_images/topn.jpeg" style="width: 500px;" /></a>
</div>
<p>Deploy the Top-N application. Save your pipeline - giving it the name “StockPipeline” - and refresh the page. You will see the Top-N plugin appear in the Analytics section of the the plugin menu on the left side of your screen.</p>
<p>Add a Top-N node to the canvas, as well as a Avro Time Partitioned Dataset sink.</p>
<p>Name the Avro Time Partitioned Dataset sink <cite>StockSink</cite> and also specify <cite>StockSink</cite> as the “Database Name.”</p>
<div class="figure align-center" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/stocksink.jpeg"><img alt="../_images/stocksink.jpeg" class="bordered-image" src="../_images/stocksink.jpeg" style="width: 500px;" /></a>
</div>
<p>In the Top-N plugin, specify the <cite>field</cite> to be “roc” (since this is the row which you want ranked) and the <cite>size</cite> to be 20 (since you want the top 20 stocks).</p>
<p>Connect the nodes in the order shown below.</p>
<div class="figure align-center" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/pipeline.jpeg"><img alt="../_images/pipeline.jpeg" class="bordered-image" src="../_images/pipeline.jpeg" style="width: 800px;" /></a>
</div>
<p>Now click “Deploy” (found in the upper right hand corner). On the pipeline is deployed, press “Run.”</p>
<p>Click the StockSink and choose “View Details.” Here, you can run a query to see the top 20 stocks that were selected. You can see that the top 5, in order, are: BBBY, BIIB, AME, AMAT, and BMY.</p>
<p>The database can be queried using RESTful calls for a program which can execute the trades on the NYSE.</p>
</div>
</div>
</div>

</div>
    <div class="col-md-2">
      <div id="right-sidebar" class="bs-sidenav scrollable-y" role="complementary">
        <div id="localtoc-scrollspy">
        </div>
      </div>
    </div></div>
</div>
<!-- block main content end -->
<!-- block footer -->
<footer class="footer">
      <div class="container">
        <div class="row">
          <div class="col-md-2 footer-left"><a title="示例: 使用纽约时报 XML 数据推送" href="nytimes-xml.html" />Previous</a></div>
          <div class="col-md-8 footer-center"><a class="footer-tab-link" href="../table-of-contents/../../reference-manual/licenses/index.html">Copyright</a> &copy; 2014-2020 Cask Data, Inc.&bull; <a class="footer-tab-link" href="//docs.cask.co/cdap/6.1.1/cdap-docs-6.1.1-web.zip" rel="nofollow">Download</a> an archive or
<a class="footer-tab-link" href="//docs.cask.co/cdap">switch the version</a> of the documentation
          </div>
          <div class="col-md-2 footer-right"><a title="示例: 分析和物联网设备数据脱敏" href="fitbit.html" />Next</a></div>
        </div>
      </div>
    </footer>
<!-- block footer end -->
<script type="text/javascript" src="../_static/bootstrap-3.3.6/js/bootstrap.min.js"></script><script type="text/javascript" src="../_static/js/bootstrap-sphinx.js"></script><script type="text/javascript" src="../_static/js/abixTreeList-2.js"></script><script type="text/javascript" src="../_static/js/cdap-dynamicscrollspy-4.js"></script><script type="text/javascript" src="../_static/js/cdap-version-menu.js"></script><script type="text/javascript" src="../_static/js/copy-to-clipboard.js"></script><script type="text/javascript" src="../_static/js/jquery.mousewheel.min.js"></script><script type="text/javascript" src="../_static/js/jquery.mCustomScrollbar.js"></script><script type="text/javascript" src="../_static/js/js.cookie.js"></script><script type="text/javascript" src="../_static/js/tabbed-parsed-literal-0.2.js"></script><script type="text/javascript" src="../_static/js/cdap-onload-javascript.js"></script><script type="text/javascript" src="../_static/js/cdap-version-menu.js"></script>
    <script src="https://cdap.gitee.io/docs/cdap/json-versions.js"/></script>
  </body>
</html>